################################################
# Analysis for:
#
# Jordan, Soren and Andrew Q. Philips. ``Improving the interpretation
#  of Random Effects Regression Results.'' Political Studies Review
#  DOI: 10.1177/14789299211068418
#
# Required files: W-replication.dta
#
################################################

library(lattice)
library(gridExtra)
library(tidyr)
library(readstata13)
library(foreign)
library(plm)
library(dplyr)
library(ggpubr)
library(reshape2)
library(ggplot2)
library(devtools)

##############################################
##############################################
##############################################
# Figure 1: The ratio of sigma_alpha to sigma_epsilon against various T's to calculate theta

T <- 50

sigma2.a <- 10
sigma2.e <- 20
sigma2.a/sigma2.e
1 - sqrt(sigma2.e /(T*sigma2.a + sigma2.e))

sigma2.a <- 100
sigma2.e <- 200
sigma2.a/sigma2.e
1 - sqrt(sigma2.e /(T*sigma2.a + sigma2.e))

sigma2.a <- sigma2.e <- c(seq(0.1,1, by = 0.2), seq(2,100, by = 5))
dat <- as.data.frame(cbind(sigma2.a, sigma2.e))

dat.comb <- expand(dat, sigma2.a, sigma2.e) # expand across all combinations
dat.comb$theta.10 <- 1 - sqrt(dat.comb$sigma2.e /(10*dat.comb$sigma2.a + dat.comb$sigma2.e))
dat.comb$theta.25 <- 1 - sqrt(dat.comb$sigma2.e /(25*dat.comb$sigma2.a + dat.comb$sigma2.e))
dat.comb$theta.50 <- 1 - sqrt(dat.comb$sigma2.e /(50*dat.comb$sigma2.a + dat.comb$sigma2.e))
dat.comb$theta.100 <- 1 - sqrt(dat.comb$sigma2.e /(100*dat.comb$sigma2.a + dat.comb$sigma2.e))

dat.comb.fac <- dat.comb
dat.comb.fac$sigma2.a <- as.factor(dat.comb.fac$sigma2.a)
dat.comb.fac$sigma2.e <- as.factor(dat.comb.fac$sigma2.e)

# Figure 1a
g1.2d <- ggplot(data = dat.comb.fac, aes(x = sigma2.a, y = sigma2.e, fill = theta.10)) + 
	geom_tile() + 
	scale_fill_gradient(low = "grey10", high = "grey90", name = expression(paste(paste(theta), " (T = 10)"))) + 
	theme_bw() + 
	scale_x_discrete(name = expression(paste(sigma[alpha], ""^"2")),
					breaks = c("0.1", "0.5", "0.9", "22", "42", "62", "82", "97"), 
					labels = c("0.1", "0.5", "0.9", "20", "40", "60", "80", "100")) + 
 	scale_y_discrete(name = expression(paste(sigma[epsilon], ""^"2")),
					breaks = c("0.1", "0.5", "0.9", "22", "42", "62", "82", "97"), 
					labels = c("0.1", "0.5", "0.9", "20", "40", "60", "80", "100"))
 
# Figure 1b
g4.2d <- ggplot(data = dat.comb.fac, aes(x = sigma2.a, y = sigma2.e, fill = theta.100)) + 
	geom_tile() + 
	scale_fill_gradient(low = "grey10", high = "grey90", name = expression(paste(paste(theta), " (T = 100)"))) + 
	theme_bw() + 
	scale_x_discrete(name = expression(paste(sigma[alpha], ""^"2")),
					breaks = c("0.1", "0.5", "0.9", "22", "42", "62", "82", "97"), 
					labels = c("0.1", "0.5", "0.9", "20", "40", "60", "80", "100")) + 
 	scale_y_discrete(name = expression(paste(sigma[epsilon], ""^"2")),
					breaks = c("0.1", "0.5", "0.9", "22", "42", "62", "82", "97"), 
					labels = c("0.1", "0.5", "0.9", "20", "40", "60", "80", "100"))
 



##############################################
##############################################
##############################################
# Table and Figure 2. Replicating Williams (2017)

data <- read.dta13("/.../W-replication.dta")

names(data)

# Convert project types and groupings to numeric
data$dy_group_num <- as.numeric(as.character(data$dy_group))

table(as.numeric(as.factor(data$projtype_cs)))

names(data)
data$proj2 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 3, 1, 0)
data$proj3 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 4, 1, 0)
data$proj4 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 5, 1, 0)
data$proj5 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 6, 1, 0)
data$proj6 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 7, 1, 0)
data$proj7 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 8, 1, 0)
data$proj8 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 9, 1, 0)
data$proj9 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 10, 1, 0)
data$proj10 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 11, 1, 0)
data$proj11 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 12, 1, 0)
data$proj12 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 13, 1, 0)
data$proj13 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 14, 1, 0)
data$proj14 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 15, 1, 0)
data$proj15 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 16, 1, 0)
data$proj16 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 17, 1, 0)
data$proj17 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 18, 1, 0)
data$proj18 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 19, 1, 0)
data$proj19 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 20, 1, 0)
data$proj20 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 21, 1, 0)
data$proj21 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 22, 1, 0)
data$proj22 <- ifelse(as.numeric(as.factor(data$projtype_cs)) == 23, 1, 0)

model.plm <- plm(proj_comp_binary ~ fundsource_pie_dacf +
	ecpres2008_voteshare_ndc + consttype_const + consttype_maintrehab + projyear_2 +
	projyear_3 + proj2 + proj3 + proj4 + proj5 + proj6 + proj7 + proj8 + proj9 +
	proj10 + proj11 + proj12 + proj13 + proj14 + proj15 + proj16 + proj17 +
	proj18 + proj19 + proj20 + proj21 + proj22 + year2012 + year2013, 
		index = c("dy_group"),
		data = subset(data, fundsource_pie2=="dacf" | fundsource_pie2=="ddf"),
		model = "random", random.method = "swar", effect = "individual")

# Table 2
summary(model.plm)

# Use the function to illustrate the demeaning.
install_github("andyphilips/qdmean")
library(qdmean)

ecpres2008_voteshare_ndc_qdemeaned <- qdmean(model = model.plm, predictor = "ecpres2008_voteshare_ndc", 
										group = "dy_group")

# Yup. Now, the leverage. What should we interpret? The -2sd/+2sd range was
data.small.model <- data.frame(model.plm$model, index(model.plm), summary(model.plm)$ercomp$theta)
names(data.small.model) <- c(names(model.plm$model), names(index(model.plm)), "theta")
c(quantile(data.small.model$ecpres2008_voteshare_ndc, 0.025), 
	quantile(data.small.model$ecpres2008_voteshare_ndc, 0.5),
	quantile(data.small.model$ecpres2008_voteshare_ndc, 0.975))

# But it is now
c((mean(ecpres2008_voteshare_ndc_qdemeaned) - sd(ecpres2008_voteshare_ndc_qdemeaned)), 
	(mean(ecpres2008_voteshare_ndc_qdemeaned)),
	(mean(ecpres2008_voteshare_ndc_qdemeaned) + sd(ecpres2008_voteshare_ndc_qdemeaned)))

gg.dat <- data.frame(c(ecpres2008_voteshare_ndc_qdemeaned, data.small.model$ecpres2008_voteshare_ndc), 
	c(rep("Demeaned", length(ecpres2008_voteshare_ndc_qdemeaned)), rep("Original", length(data.small.model$ecpres2008_voteshare_ndc))))
names(gg.dat) <- c("Density", "Variable")



# Figure 2
ggplot(gg.dat, aes(x = Density, fill = Variable)) + 
	geom_density(alpha = 0.5) +
	xlim(-0.01, 1) + 
	labs(x = "NDC Vote Share 2008", y = "Density") +
	scale_fill_grey(start = 0.9, end = 0) +
	theme_classic() + theme(axis.text.y = element_blank()) +
	annotate("text", x = 0.5, y = 0.8, label = "Original Range Interpreted") +
	geom_segment(aes(x = 0.1, y = 0.5, xend = 0.9, yend = 0.5), lty = 2, size = 1.25) +
	geom_segment(aes(x = 0.1, y = 0.25, xend = 0.1, yend = 0.75), lty = 2, size = 1.25) +
	geom_segment(aes(x = 0.9, y = 0.25, xend = 0.9, yend = 0.75), lty = 2, size = 1.25) +
	annotate("text", x = 0.472635, y = 0.1, label = "Original Mean", size = 4) + #, col = "darkgray") +
	geom_segment(aes(x = 0.472635, y = 0.25, xend = 0.472635, yend = 0.5), lty = 1, size = 1.25) + #, col = "darkred") +
	annotate("text", x = 0.1907204, y = 4.3, label = "+/- S.D. Range of Demeaned", size = 4, col = "grey30") +
	geom_segment(aes(x = 0.09599293, y = 4, xend = 0.28544796, yend = 4), lty = 3, size = 1.25, col = "grey30") +
	geom_segment(aes(x = 0.09599293, y = 3.75, xend = 0.09599293, yend = 4.25), lty = 3, size = 1.25, col = "grey30") +
	geom_segment(aes(x = 0.28544796, y = 3.75, xend = 0.28544796, yend = 4.25), lty = 3, size = 1.25, col = "grey30") +
	annotate("text", x = 0.1907204, y = 3.6, label = "New Mean", size = 4, col = "grey30") +
	geom_segment(aes(x = 0.1907204, y = 3.75, xend = 0.1907204, yend = 4), lty = 1, size = 1.2, col = "grey30") #, col = "darkblue") 
dev.off()
